#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 09 20:36:05 2016

@author: za-chenhongrui
"""

import lxml.html
import requests


def request(query, page=1):
    URL = 'http://www.baidu.com/s'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) '
                      'AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/54.0.2840.71 Safari/537.36 Firefox/2.0.0.11',
        'Connection': 'keep-alive'
    }
    params = {
        'wd': query,
        'ie': 'utf-8',
        'pn': max(0, (page - 1) * 10)
    }
    resp = requests.get(URL, params=params, headers=headers)
    resp.cookies.clear()
    html = lxml.html.fromstring(resp.text)
    links = html.xpath('//div[@class="f13"]//a[@class="c-showurl"]')
    links2 = html.xpath('//div[@class="f13"]//div[@class="c-tools"]')

    for link, link2 in zip(links, links2)[:30]:
        url = link.text
        title = eval(link2.get('data-tools'))['title']
        content = '\t'.join([query, str(page), url.encode('utf-8'), title])
        print(content)


def main():
    for Query in ['金融', '财经','经济']:
        query = Query + '网站'
        for page in xrange(1, 5):
            request(query, page)

if __name__ == '__main__':
    temp = []
    urls = []
    set(urls)
    main()
